Documentation for the analysis of mouse placental RNA-seq data at e7.5, e8.5 and e9.5.

Load essential files + functions + libraries

tpm2 <- read.table("/work/LAS/geetu-lab/hhvu/project1_2/rna-seq/PlacentaRNA-seq/Files/tpmForClustering.txt", header = T)

summ <- function(trans_cluster, tpm2=tpm2) {
  tpm3 <- tpm2
  tpm3 <- cbind(rownames(tpm2), tpm3)
  rownames(tpm3) <- 1:nrow(tpm3)
  colnames(tpm3)[1] <- "transcripts"
  
  #e7.5
  e7.5_table <- data.frame(tpm3$transcripts, rowMeans(tpm3[,c("E7.5_2", "E7.5_3", "E7.5_4", "E7.5_5", "E7.5_6")]))
  e7.5_table$time <- c("e7.5")
  rownames(e7.5_table) <- 1:nrow(e7.5_table)
  colnames(e7.5_table) <- c("transcripts", "mean_cts_scaled", "time")
  
  #e8.5
  e8.5_table <- data.frame(tpm3$transcripts, rowMeans(tpm3[,c("E8.5_1", "E8.5_2", "E8.5_3", "E8.5_4", "E8.5_5", "E8.5_6")]))
  e8.5_table$time <- c("e8.5")
  rownames(e8.5_table) <- 1:nrow(e8.5_table)
  colnames(e8.5_table) <- c("transcripts", "mean_cts_scaled", "time")
  
  #e9.5
  e9.5_table <- data.frame(tpm3$transcripts, rowMeans(tpm3[,c("E9.5_1", "E9.5_2", "E9.5_3", "E9.5_4", "E9.5_5")]))
  e9.5_table$time <- c("e9.5")
  rownames(e9.5_table) <- 1:nrow(e9.5_table)
  colnames(e9.5_table) <- c("transcripts", "mean_cts_scaled", "time")
  
  summary <- rbind(rbind(e7.5_table, e8.5_table), e9.5_table)
  summary <- summary[order(summary$transcripts),]
  summary <- inner_join(summary, trans_cluster, by = c("transcripts" = "name"))
  summary <- inner_join(summary, t2g, by = c("transcripts" = "target_id"))
  
  return(summary)
}

plotClus <- function(summary, title){
  ascl2 <- subset(summary, summary$ext_gene == "Ascl2") #e7.5
  gjb5 <- subset(summary, summary$ext_gene == "Gjb5") #e7.5
  dnmt1 <- subset(summary, summary$ext_gene == "Dnmt1") #e8.5
  itga4 <- subset(summary, summary$ext_gene == "Itga4") #e8.5
  gjb2 <- subset(summary, summary$ext_gene == "Gjb2") #e9.5
  igf2 <- subset(summary, summary$ext_gene == "Igf2") #e9.5
  p <- ggplot(aes(time, mean_cts_scaled), data = summary) +
    geom_line(aes(group = transcripts), alpha = 0.5, colour = "grey77") +
    geom_line(stat = "summary", fun = "median", size = 2,
              aes(group = 1, color = "Group median")) +
    labs(title = title,
         x = "Time point",
         y = "Scaled mean transcript counts", color = "Legend", linetype = "Legend") +
    theme(plot.title = element_text(size = 15, face = "bold"), legend.text=element_text(size=20)) +
    geom_line(data = ascl2, size = 2.5, aes(group = transcripts, color = "Ascl2", linetype = "Ascl2"), alpha = 1) + #e7.5
    geom_line(data = gjb5, size = 2, aes(group = transcripts, color = "Gjb5", linetype = "Gjb5" ), alpha = 1) + #e7.5
    geom_line(data = dnmt1, size = 2, aes(group = transcripts, color = "Dnmt1", linetype = "Dnmt1"), alpha = 1) + #e8.5
    geom_line(data = itga4, size = 2, aes(group = transcripts, color = "Itga4", linetype = "Itga4"), alpha = 1) + #e8.5
    geom_line(data = gjb2, size = 2, aes(group = transcripts, color = "Gjb2", linetype = "Gjb2"), alpha = 1) + #e9.5
    geom_line(data = igf2, size = 2, aes(group = transcripts, color = "Igf2", linetype = "Igf2"), alpha = 1) + #e9.5
    scale_color_manual(name = "Legend", values = c("Ascl2" = "darkolivegreen4", "Gjb5" = "yellow3",
                                                   "Dnmt1" = "dodgerblue4", "Itga4" = "deepskyblue4",
                                                   "Gjb2" = "saddlebrown", "Igf2" = "salmon3",
                                                   "Group median" = "grey22")) + 
    scale_linetype_manual(name = "Legend", values = c("Ascl2" = "solid", "Gjb5" = "solid",
                                                      "Dnmt1" = "twodash", "Itga4" = "solid",
                                                      "Gjb2" = "solid", "Igf2" = "solid",
                                                      "Group median" = "solid")) + 
    guides(linetype=F,
           colour=guide_legend(keywidth = 3, keyheight = 1)) +
    theme(text = element_text(size=15),
          legend.text=element_text(size=15),
          axis.title.x = element_text(size = 15),
          axis.title.y = element_text(size = 15),
          axis.text.x = element_text(angle=0, hjust=0.5, size = 25),
          axis.text.y = element_text(size = 15)) +
    facet_grid(cols = vars(value))
  return(p)
}

library("ggplot2", suppressMessages())
library("dplyr", suppressMessages())
library("tidyverse", suppressMessages())

t2g <- read.table("/work/LAS/geetu-lab/hhvu/project1_2/rna-seq/PlacentaRNA-seq/Files/t2g.txt", header = T, sep = "\t")
t2g <- t2g[order(t2g$target_id),]
coding <- read.table("/work/LAS/geetu-lab/hhvu/project1_2/rna-seq/PlacentaRNA-seq/Files/Mus_musculus_grcm38_coding_transcripts.txt", header = F)

hc <- hclust(dist(tpm2, method = "euclidean"), "complete")
dend <- as.dendrogram(hc)

K = 3

trans_cluster <- cutree(hc, k = 3) %>% enframe()
table(trans_cluster$value)
#> 
#>    1    2    3 
#> 8091 7238 8242
summK3 <- summ(trans_cluster, tpm2)
p <- plotClus(summK3, "Hierarchical Clustering of Transcripts, k = 3")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p

K = 4

trans_cluster <- cutree(hc, k = 4) %>% enframe()
table(trans_cluster$value)
#> 
#>    1    2    3    4 
#> 8091 7238 4501 3741
summK4 <- summ(trans_cluster, tpm2)
p <- plotClus(summK4, "Hierarchical Clustering of Transcripts, k = 4")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p

K = 5

trans_cluster <- cutree(hc, k = 5) %>% enframe()
table(trans_cluster$value)
#> 
#>    1    2    3    4    5 
#> 8091 7238 4501 2532 1209
summK5 <- summ(trans_cluster, tpm2)
p <- plotClus(summK5, "Hierarchical Clustering of Transcripts, k = 5")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p

K = 6

trans_cluster <- cutree(hc, k = 6) %>% enframe()
table(trans_cluster$value)
#> 
#>    1    2    3    4    5    6 
#> 4833 3258 7238 4501 2532 1209
summK6 <- summ(trans_cluster, tpm2)
p <- plotClus(summK6, "Hierarchical Clustering of Transcripts, k = 6")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p

K = 7

trans_cluster <- cutree(hc, k = 7) %>% enframe()
table(trans_cluster$value)
#> 
#>    1    2    3    4    5    6    7 
#> 4833 3258 7238 4501 1349 1209 1183
summK7 <- summ(trans_cluster, tpm2)
p <- plotClus(summK7, "Hierarchical Clustering of Transcripts, k = 7")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p

K = 8

trans_cluster <- cutree(hc, k = 8) %>% enframe()
table(trans_cluster$value)
#> 
#>    1    2    3    4    5    6    7    8 
#> 4833 3258 7238 2429 1349 1209 1183 2072
summK8 <- summ(trans_cluster, tpm2)
p <- plotClus(summK8, "Hierarchical Clustering of Transcripts, k = 8")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p

K = 9

trans_cluster <- cutree(hc, k = 9) %>% enframe()
table(trans_cluster$value)
#> 
#>    1    2    3    4    5    6    7    8    9 
#> 2838 3258 7238 2429 1349 1995 1209 1183 2072
summK9 <- summ(trans_cluster, tpm2)
p <- plotClus(summK9, "Hierarchical Clustering of Transcripts, k = 9")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p

K = 10

trans_cluster <- cutree(hc, k = 10) %>% enframe()
table(trans_cluster$value)
#> 
#>    1    2    3    4    5    6    7    8    9   10 
#> 2838 1173 7238 2429 1349 2085 1995 1209 1183 2072
summK10 <- summ(trans_cluster, tpm2)
p <- plotClus(summK10, "Hierarchical Clustering of Transcripts, k = 10")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
p